Code
# We load the MAE object
mae <- load_latest_mae(dir = str_c(data_dir(), "03_augmented_mae/"))
clin <- MultiAssayExperiment::colData(mae) |> as.data.frame() |> as_tibble()# We load the MAE object
mae <- load_latest_mae(dir = str_c(data_dir(), "03_augmented_mae/"))
clin <- MultiAssayExperiment::colData(mae) |> as.data.frame() |> as_tibble()tmp <-
clin |>
filter(!is.na(DAY)) |>
left_join(get_assay_wide_format(mae, "tax_16S_p", add_colData = FALSE) |> mutate(has_16S = TRUE), by = join_by(Barcode)) |>
filter(has_16S) |>
select(USUBJID, ARM, AVISITN, PIPV)
bind_rows(
tmp |>
select(USUBJID, ARM) |>
distinct() |>
dplyr::count(ARM) |>
mutate(Visit = "At least one visit"),
tmp |>
filter(PIPV) |>
mutate(Visit = get_visit_labels(AVISITN)) |>
dplyr::count(ARM, Visit),
tmp |>
filter(PIPV) |>
group_by(USUBJID) |> mutate(n = n()) |> ungroup() |> filter(n == max(n)) |>
select(USUBJID, ARM) |>
distinct() |>
dplyr::count(ARM) |>
mutate(Visit = "All planned in-person visits")
) |>
mutate(
Visit =
Visit |>
factor(
levels =
c(
"At least one visit",
get_visit_labels(c(0:4, 7)) |> levels(),
"All planned in-person visits"
)
)
) |>
dplyr::rename(Arm = ARM) |>
pivot_wider(id_cols = Arm, names_from = Visit, values_from = n) |>
as.data.frame() |> column_to_rownames("Arm") |>
gt(
rownames_to_stub = TRUE,
caption = "Number of participants in each study arm (rows) with an available 16S rRNA sequencing data at any, each, or all planned in-person visits."
) |>
grand_summary_rows(
fns = list(label = "Total", id = "totals", fn = "sum"),
fmt = ~ fmt_integer(.),
side = "top"
) | At least one visit | Pre-MTZ | Post-MTZ | Week 4 | Week 8 | Week 12 | Week 24 | All planned in-person visits | |
|---|---|---|---|---|---|---|---|---|
| Total | 211 | 208 | 203 | 181 | 172 | 179 | 165 | 139 |
| LACTIN-V | 142 | 140 | 138 | 124 | 120 | 123 | 113 | 99 |
| Placebo | 69 | 68 | 65 | 57 | 52 | 56 | 52 | 40 |
g <-
clin |>
filter(!is.na(DAY)) |>
left_join(get_assay_wide_format(mae, "tax_16S_p", add_colData = FALSE) |> mutate(has_16S = TRUE), by = join_by(Barcode)) |>
filter(has_16S) |>
ggplot() +
aes(x = DAY, y = USUBJID, col = AVISITN |> floor() |> get_visit_labels(), shape = VISIT_TYPE) +
geom_point() +
facet_grid(ARM ~ ., scales = "free_y", space = "free_y") +
scale_color_brewer("Visit", type = "qual", palette = 3, guide = guide_legend(direction = "vertical")) +
scale_shape_manual("Visit type", values = c(16, 3), guide = guide_legend(direction = "vertical")) +
theme(axis.text.y = element_blank(), legend.position = "right") +
labs(
x = "Study day",
y = "Participants",
subtitle = "Visits with 16S rRNA sequencing data"
)
g ggsave(g, filename = str_c(fig_out_dir(), "S1A.pdf"), width = 6, height = 8, device = cairo_pdf)clin |>
filter(AVISITN == 0, !is.na(DAY)) |>
ggplot(aes(x = DAY + 5)) + geom_histogram(binwidth = 1) +
labs(
title = "Screening visits timing distribution",
x = "Day relative to first MTZ dose",
y = "Number of participants"
)# j <- which(mae$ARM == "Placebo")
# mae_Placebo <- mae[,j,]
# mae_Placebo <- mae_Placebo[,,c(3,9)]
# save(mae_Placebo, file = "mae_Placebo.RData")mae <- load_latest_mae(str_c(data_dir(), "03_augmented_mae/"))Loading mae_20250809.RDS
clin <- MultiAssayExperiment::colData(mae) |> data.frame() |> as_tibble()race_and_edu <-
clin |>
filter(ARM %in% c("LACTIN-V", "Placebo"), !is.na(EDULVL)) |>
select(USUBJID, RACEGR1, RACEGR2, EDULVL) |>
distinct() |>
mutate(RACEGR2 = RACEGR2 |> fct_relevel("Other", after = 1)) |>
arrange(RACEGR2) |>
mutate(RACEGR2 = RACEGR2 |> as.character() |> str_wrap(15) |> fct_inorder())
(Xsq <- chisq.test(table(race_and_edu$EDULVL, race_and_edu$RACEGR2)))Warning in chisq.test(table(race_and_edu$EDULVL, race_and_edu$RACEGR2)):
Chi-squared approximation may be incorrect
Pearson's Chi-squared test
data: table(race_and_edu$EDULVL, race_and_edu$RACEGR2)
X-squared = 60.877, df = 8, p-value = 3.135e-10
t_1 <- race_and_edu |> dplyr::count(RACEGR2, EDULVL)
g_1 <-
ggplot(t_1, aes(x = RACEGR2, y = EDULVL, fill = RACEGR2)) +
geom_tile(aes(alpha = n)) +
geom_text(aes(label = n)) +
# scale_alpha(range = c(0,1)) +
# scale_fill_gradient(low = "white", high = "steelblue") +
xlab("Self-declared participants' race") + ylab("") +
scale_fill_manual(values = c("purple1", "darkseagreen2", "gold3")) +
guides(fill = "none", alpha = "none")
g_2 <-
race_and_edu |>
arrange(RACEGR2, RACEGR1) |>
mutate(RACEGR1 = RACEGR1 |> str_wrap(12) |> fct_inorder()) |>
dplyr::count(RACEGR2, RACEGR1, EDULVL) |>
ggplot(aes(x = RACEGR1, y = EDULVL, fill = RACEGR2)) +
geom_tile(aes(alpha = n)) +
geom_text(aes(label = n)) +
# scale_fill_gradient(low = "white", high = "steelblue") +
xlab("Self-declared participants' race (detailed)") + ylab("") +
scale_fill_manual(values = c("purple1", "darkseagreen2", "gold3")) +
guides(fill = "none", alpha = "none")g_1 + g_2 + plot_layout(guides = "collect", widths = c(4, 8))mae <- load_latest_mae(str_c(data_dir(), "03_augmented_mae/"))Loading mae_20250809.RDS
clin <- MultiAssayExperiment::colData(mae) |> data.frame() |> as_tibble()In this documents, we perform a series of check to ensure that the two groups (Lactin-V and placebo recipients) have similar characteristics at baseline.
For each of the domains (demographic variables, microbiota composition, cytokine/chemokine concentrations, behavioral variable), we check that balance for participants included for evaluating the primary outcome (= L. crispatus ≥ 50%) at week 12. We also repeat that analysis for participants with 16S data at week 24.
week12_participants <- clin$USUBJID[!is.na(clin$AVISITN) & (clin$AVISITN == 4)]
week24_participants <- clin$USUBJID[!is.na(clin$AVISITN) & (clin$AVISITN == 7)]dem_data <-
MultiAssayExperiment::colData(mae[,,"ASV_16S"]) |>
as_tibble() |>
select(USUBJID, ARM, AGE, RACEGR2, ETHNIC, EDULVL, N_PAST_BV) |>
dplyr::rename(
Age = AGE,
`Self-declared race` = RACEGR2,
Ethnicity = ETHNIC,
Education = EDULVL,
`Nb of past BV episodes` = N_PAST_BV
)Warning: 'experiments' dropped; see 'drops()'
harmonizing input:
removing 27361 sampleMap rows not in names(experiments)
removing 762 colData rownames not in sampleMap 'primary'
dem_data |>
filter(USUBJID %in% week12_participants) |>
distinct() |>
select(-USUBJID) |>
tbl_summary(by = ARM) |>
add_p() |>
modify_caption(caption = "Demographic variables of participants with microbiota composition data at week 12")| Characteristic | LACTIN-V N = 1231 |
Placebo N = 561 |
p-value2 |
|---|---|---|---|
| Age | 30 (25, 36) | 32 (25, 37) | 0.8 |
| Self-declared race | 0.7 | ||
| Black/African American | 44 (36%) | 20 (36%) | |
| White | 52 (42%) | 21 (38%) | |
| Other | 27 (22%) | 15 (27%) | |
| Ethnicity | 0.8 | ||
| NOT HISPANIC OR LATINO | 98 (80%) | 44 (79%) | |
| HISPANIC OR LATINO | 25 (20%) | 12 (21%) | |
| NOT REPORTED | 0 (0%) | 0 (0%) | |
| UNKNOWN | 0 (0%) | 0 (0%) | |
| Education | 0.6 | ||
| Did not complete high school | 10 (8.1%) | 1 (1.8%) | |
| Completed high school | 46 (37%) | 21 (38%) | |
| Completed junior college | 19 (15%) | 10 (18%) | |
| Completed college (undergraduate degree) | 35 (28%) | 19 (34%) | |
| Completed graduate degree | 13 (11%) | 5 (8.9%) | |
| Nb of past BV episodes | 0.8 | ||
| None | 7 (5.7%) | 4 (7.1%) | |
| 1-2 | 27 (22%) | 10 (18%) | |
| 3-4 | 20 (16%) | 7 (13%) | |
| 5 or more | 61 (50%) | 29 (52%) | |
| Unknown | 8 (6.5%) | 6 (11%) | |
| 1 Median (Q1, Q3); n (%) | |||
| 2 Wilcoxon rank sum test; Pearson’s Chi-squared test; Fisher’s exact test | |||
dem_data |>
filter(USUBJID %in% week24_participants) |>
distinct() |>
select(-USUBJID) |>
tbl_summary(by = ARM) |>
add_p() |>
modify_caption(caption = "Demographic variables of participants with microbiota composition data at week 24")| Characteristic | LACTIN-V N = 1131 |
Placebo N = 521 |
p-value2 |
|---|---|---|---|
| Age | 30 (25, 36) | 33 (26, 37) | 0.6 |
| Self-declared race | 0.8 | ||
| Black/African American | 39 (35%) | 20 (38%) | |
| White | 48 (42%) | 19 (37%) | |
| Other | 26 (23%) | 13 (25%) | |
| Ethnicity | 0.7 | ||
| NOT HISPANIC OR LATINO | 90 (80%) | 40 (77%) | |
| HISPANIC OR LATINO | 23 (20%) | 12 (23%) | |
| NOT REPORTED | 0 (0%) | 0 (0%) | |
| UNKNOWN | 0 (0%) | 0 (0%) | |
| Education | 0.6 | ||
| Did not complete high school | 9 (8.0%) | 1 (1.9%) | |
| Completed high school | 40 (35%) | 21 (40%) | |
| Completed junior college | 17 (15%) | 10 (19%) | |
| Completed college (undergraduate degree) | 34 (30%) | 15 (29%) | |
| Completed graduate degree | 13 (12%) | 5 (9.6%) | |
| Nb of past BV episodes | 0.7 | ||
| None | 6 (5.3%) | 4 (7.7%) | |
| 1-2 | 25 (22%) | 9 (17%) | |
| 3-4 | 17 (15%) | 6 (12%) | |
| 5 or more | 57 (50%) | 27 (52%) | |
| Unknown | 8 (7.1%) | 6 (12%) | |
| 1 Median (Q1, Q3); n (%) | |||
| 2 Wilcoxon rank sum test; Pearson’s Chi-squared test; Fisher’s exact test | |||
Species composition
species <- MultiAssayExperiment::assay(mae, "tax_16S_p") %>% t()We first compare the species composition at the first two visits (pre- and post-MTZ) for participants included for primary outcome at week 12.
PCoA_baseline(
mae = mae, assay_name = "tax_16S_p",
visit = 0, included_participants = week12_participants
)PCoA_baseline(
mae = mae, assay_name = "tax_16S_p",
visit = 1, included_participants = week12_participants
)And for participants with microbiota composition at week 24.
PCoA_baseline(
mae = mae, assay_name = "tax_16S_p",
visit = 0, included_participants = week24_participants
)PCoA_baseline(
mae = mae, assay_name = "tax_16S_p",
visit = 1, included_participants = week24_participants
)ASV composition
set.seed(12345)
g1 <-
PCoA_baseline(
mae = mae, assay_name = "ASV_16S_filtered",
visit = 0, included_participants = week12_participants
)
g2 <-
PCoA_baseline(
mae = mae, assay_name = "ASV_16S_filtered",
visit = 1, included_participants = week12_participants
)
g1g2 ggsave(g1, filename = str_c(fig_out_dir(), "S1B.pdf"), width = 13, height = 4, device = cairo_pdf) ggsave(g2, filename = str_c(fig_out_dir(), "S1C.pdf"), width = 13, height = 4, device = cairo_pdf)And for participants with microbiota composition at week 24.
set.seed(12345)
PCoA_baseline(
mae = mae, assay_name = "ASV_16S_filtered",
visit = 0, included_participants = week24_participants
)PCoA_baseline(
mae = mae, assay_name = "ASV_16S_filtered",
visit = 1, included_participants = week24_participants
)Topic composition
gammas <-
get_assay_long_format(
mae, assayname = "c_topics_16S_8", add_rowData = TRUE,
feature_name = "topic", values_name = "prop"
)plot_topic_composition_1_visit(gammas, visit = 0) +
labs(subtitle = "Screening visit")test_v0 <- test_topic_comp_by_ARM(gammas, visit = 0)Warning in DirichletReg::DR_data(y): some entries are 0 or 1 => transformation
forced
test_v0$full %>% summary()Call:
DirichletReg::DirichReg(formula = y ~ ARM, data = data)
Standardized Residuals:
Min 1Q Median 3Q Max
L. crispatus -0.5435 -0.5397 -0.5355 -0.5211 2.2653
L. iners -0.7271 -0.7036 -0.6163 -0.3139 5.2491
L. jensenii -0.5046 -0.5046 -0.5030 -0.4941 4.1606
Other L. -0.4960 -0.4960 -0.4960 -0.4937 6.0511
G. s./l. topic -1.3330 -0.7035 0.3135 1.1489 2.8858
P. amnii topic -0.9743 -0.7961 -0.1684 0.8594 3.6323
Ca. L. v. (BVAB1) topic -0.7046 -0.7024 -0.6555 1.6499 5.5613
P. bivia topic -0.6826 -0.6615 -0.6096 0.1651 5.4358
------------------------------------------------------------------
Beta-Coefficients for variable no. 1: L. crispatus
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.58571 0.09307 -17.038 <2e-16 ***
ARMPlacebo -0.01513 0.16572 -0.091 0.927
------------------------------------------------------------------
Beta-Coefficients for variable no. 2: L. iners
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.06996 0.09207 -11.622 <2e-16 ***
ARMPlacebo 0.02326 0.16367 0.142 0.887
------------------------------------------------------------------
Beta-Coefficients for variable no. 3: L. jensenii
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.730140 0.093267 -18.550 <2e-16 ***
ARMPlacebo 0.002702 0.166056 0.016 0.987
------------------------------------------------------------------
Beta-Coefficients for variable no. 4: Other L.
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.76144 0.09331 -18.878 <2e-16 ***
ARMPlacebo 0.03275 0.16608 0.197 0.844
------------------------------------------------------------------
Beta-Coefficients for variable no. 5: G. s./l. topic
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.30113 0.08981 -3.353 0.0008 ***
ARMPlacebo 0.23248 0.15809 1.471 0.1414
------------------------------------------------------------------
Beta-Coefficients for variable no. 6: P. amnii topic
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.83048 0.09144 -9.082 <2e-16 ***
ARMPlacebo 0.29098 0.16127 1.804 0.0712 .
------------------------------------------------------------------
Beta-Coefficients for variable no. 7: Ca. L. v. (BVAB1) topic
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.13225 0.09221 -12.278 <2e-16 ***
ARMPlacebo -0.04403 0.16417 -0.268 0.789
------------------------------------------------------------------
Beta-Coefficients for variable no. 8: P. bivia topic
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.24259 0.09246 -13.440 <2e-16 ***
ARMPlacebo 0.09966 0.16421 0.607 0.544
------------------------------------------------------------------
Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Log-likelihood: 3012 on 16 df (78 BFGS + 1 NR Iterations)
AIC: -5992, BIC: -5942
Number of Observations: 165
Link: Log
Parametrization: common
anova(test_v0$full, test_v0$null)Analysis of Deviance Table
Model 1: DirichletReg::DirichReg(formula = y ~ ARM, data = data)
Model 2: DirichletReg::DirichReg(formula = y ~ 1, data = data)
Deviance N. par Difference df Pr(>Chi)
Model 1 -6023.6 16
Model 2 -6018.7 8 4.966 8 0.7612
No significant differences in microbiota composition at baseline when characterizing microbiota with topic composition.
plot_topic_composition_1_visit(gammas, visit = 1) +
labs(subtitle = "Randomization visit")test_v1 <- test_topic_comp_by_ARM(gammas, visit = 1)Warning in DirichletReg::DR_data(y): some entries are 0 or 1 => transformation
forced
test_v1$full |> summary()Call:
DirichletReg::DirichReg(formula = y ~ ARM, data = data)
Standardized Residuals:
Min 1Q Median 3Q Max
L. crispatus -0.5992 -0.5839 -0.5697 -0.5384 4.3991
L. iners -1.6329 -0.5201 0.5791 1.8365 2.6819
L. jensenii -0.6164 -0.6148 -0.6011 0.1677 6.7281
Other L. -0.5461 -0.5461 -0.5448 -0.4932 6.2604
G. s./l. topic -0.9899 -0.7721 -0.4348 0.6082 3.3974
P. amnii topic -0.6527 -0.6037 -0.5801 -0.2980 3.4925
Ca. L. v. (BVAB1) topic -0.6143 -0.6105 -0.5644 -0.3939 6.8855
P. bivia topic -0.6929 -0.6492 -0.5593 -0.3589 4.1953
------------------------------------------------------------------
Beta-Coefficients for variable no. 1: L. crispatus
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.35436 0.09292 -14.576 <2e-16 ***
ARMPlacebo -0.03243 0.16840 -0.193 0.847
------------------------------------------------------------------
Beta-Coefficients for variable no. 2: L. iners
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.20237 0.08715 2.322 0.0202 *
ARMPlacebo 0.08262 0.15640 0.528 0.5973
------------------------------------------------------------------
Beta-Coefficients for variable no. 3: L. jensenii
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.3033 0.0928 -14.043 <2e-16 ***
ARMPlacebo -0.0160 0.1681 -0.095 0.924
------------------------------------------------------------------
Beta-Coefficients for variable no. 4: Other L.
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.52298 0.09326 -16.330 <2e-16 ***
ARMPlacebo 0.02450 0.16889 0.145 0.885
------------------------------------------------------------------
Beta-Coefficients for variable no. 5: G. s./l. topic
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.79417 0.09131 -8.698 <2e-16 ***
ARMPlacebo 0.34307 0.16333 2.101 0.0357 *
------------------------------------------------------------------
Beta-Coefficients for variable no. 6: P. amnii topic
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.34068 0.09289 -14.433 <2e-16 ***
ARMPlacebo 0.17043 0.16768 1.016 0.309
------------------------------------------------------------------
Beta-Coefficients for variable no. 7: Ca. L. v. (BVAB1) topic
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.30922 0.09282 -14.105 <2e-16 ***
ARMPlacebo -0.12367 0.16848 -0.734 0.463
------------------------------------------------------------------
Beta-Coefficients for variable no. 8: P. bivia topic
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.1811 0.0925 -12.768 <2e-16 ***
ARMPlacebo 0.1178 0.1671 0.705 0.481
------------------------------------------------------------------
Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Log-likelihood: 2703 on 16 df (78 BFGS + 1 NR Iterations)
AIC: -5374, BIC: -5324
Number of Observations: 161
Link: Log
Parametrization: common
anova(test_v1$full, test_v1$null)Analysis of Deviance Table
Model 1: DirichletReg::DirichReg(formula = y ~ ARM, data = data)
Model 2: DirichletReg::DirichReg(formula = y ~ 1, data = data)
Deviance N. par Difference df Pr(>Chi)
Model 1 -5405.7 16
Model 2 -5399.4 8 6.2048 8 0.6243
gammas |>
filter(AVISITN %in% 0:1, EOSSTT == "COMPLETED", !is.na(ARM)) |>
ggplot() +
aes(x = topic_label, y = prop, fill = ARM) +
geom_boxplot(outlier.size = 0.5) +
facet_grid(. ~ ifelse(AVISITN == 0, "Screening (pre-MTZ)", "Randomization (post-MTZ)") |> fct_inorder()) +
scale_fill_manual("", values = get_arm_colors()) +
theme(
strip.background = element_rect(fill = "gray", color = NA),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top"
) +
xlab("Topic")At the randomization visit, there are slightly more participants with subcommunity IV-B in the Placebo arm than in the Lactin-V arm.
CST distribution
CSTs <-
get_assay_wide_format(mae, "CST") |>
unnest(cols = c(assay)) |>
mutate(Visit = str_c("Visit ", AVISITN))For participants with microbiota data at week 12
week12_CSTs <-
CSTs |> filter(AVISITN %in% 0:1, USUBJID %in% week12_participants)
week12_CSTs |>
ggplot(aes(x = subCST, fill = CST)) +
geom_bar() +
facet_grid(ARM ~ Visit, scales = "free_y") +
ylab("Nb of participants") +
scale_fill_manual(values = get_topic_colors(week12_CSTs$CST |> unique() |> sort()))week12_CSTs_V0 <-
week12_CSTs |>
filter(AVISITN == 0) |>
select(subCST, ARM)
(
V0_chisq <-
chisq.test(table(week12_CSTs_V0), simulate.p.value = TRUE)
)
Pearson's Chi-squared test with simulated p-value (based on 2000
replicates)
data: table(week12_CSTs_V0)
X-squared = 5.7833, df = NA, p-value = 0.4878
week12_CSTs_V1 <-
week12_CSTs |>
filter(AVISITN == 1) |>
select(subCST, ARM)
(
V1_chisq <-
chisq.test(table(week12_CSTs_V1), simulate.p.value = TRUE)
)
Pearson's Chi-squared test with simulated p-value (based on 2000
replicates)
data: table(week12_CSTs_V1)
X-squared = 13.312, df = NA, p-value = 0.02449
For participants with microbiota data at week 24
week24_CSTs <-
CSTs |> filter(AVISITN %in% 0:1, USUBJID %in% week24_participants)
week24_CSTs |>
ggplot(aes(x = subCST, fill = CST)) +
geom_bar() +
facet_grid(ARM ~ Visit, scales = "free_y") +
ylab("Nb of participants") +
scale_fill_manual(values = get_topic_colors(week12_CSTs$CST |> unique() |> sort()))week24_CSTs_V0 <-
week24_CSTs |>
filter(AVISITN == 0) |>
select(subCST, ARM)
(
V0_chisq <-
chisq.test(table(week24_CSTs_V0), simulate.p.value = TRUE)
)
Pearson's Chi-squared test with simulated p-value (based on 2000
replicates)
data: table(week24_CSTs_V0)
X-squared = 4.815, df = NA, p-value = 0.6167
week24_CSTs_V1 <-
week24_CSTs |>
filter(AVISITN == 1) |>
select(subCST, ARM)
(
V1_chisq <-
chisq.test(table(week24_CSTs_V1), simulate.p.value = TRUE)
)
Pearson's Chi-squared test with simulated p-value (based on 2000
replicates)
data: table(week24_CSTs_V1)
X-squared = 13.215, df = NA, p-value = 0.03248
No differences at baseline, but some differences post-MTZ, at randomization.
library(tidyverse)
library(patchwork)
library(gtsummary)
library(magrittr)
tmp <- fs::dir_map("R", source)
tmp <- fs::dir_map("../../R/", source)
theme_set(theme_publication())mae <- load_latest_mae(str_c(data_dir(), "03_augmented_mae/"))Loading mae_20250809.RDS
clin <- MultiAssayExperiment::colData(mae) |> data.frame() |> as_tibble()assay_name <- "tax_16S_p"
mb_wide <- get_assay_wide_format(mae, assay_name)
mb_wide <- mb_wide |> mutate(assay = assay |> as.data.frame() |> set_rownames(Barcode))
mb_wide_v0 <- mb_wide |> filter(AVISITN == 0, !is.na(ARM))
distance <- "bray"
comp_data <- mb_wide_v0$assay
BC_dist <- vegan::vegdist(comp_data, method = distance)
mds <- cmdscale(BC_dist, k = 4, eig = TRUE)
g_eig <-
ggplot(tibble(k = 1:20, eig = mds$eig[1:20]), aes(x = k, y = eig)) +
geom_bar(stat = "identity") +
labs(title = "Top eigenvalues", subtitle = str_c('MDS using "',distance,'" on ', assay_name, ' data'))
mds_df <-
tibble(
PCo1 = mds$points[, 1],
PCo2 = mds$points[, 2],
PCo3 = mds$points[, 3],
PCo4 = mds$points[, 4],
Barcode = rownames(mds$points)
) |>
left_join(mb_wide_v0, by = "Barcode")
fit_ARM <- vegan::adonis2(comp_data ~ ARM, data = mb_wide_v0, permutations=999, method=distance)
fit_RACE <- vegan::adonis2(comp_data ~ RACEGR2, data = mb_wide_v0, permutations=999, method=distance)
fit_SITE <- vegan::adonis2(comp_data ~ ARM + RACEGR2 + SITENAME, data = mb_wide_v0, permutations=999, method=distance)
g_arms <-
ggplot(mds_df, aes(x = PCo1, y = PCo2, col = ARM)) +
geom_point() +
coord_fixed() +
theme(legend.position = "right") +
annotate(
"label", x = Inf, y = Inf, hjust = 1, vjust = 1,
label = str_c("p: ", (fit_ARM$`Pr(>F)`[1]) %>% round(2))
) +
scale_color_manual("", values = get_arm_colors(), guide = guide_legend(direction = "vertical")) +
ggtitle("PCoA by study arm")
g_race <-
ggplot(mds_df, aes(x = PCo1, y = PCo2, col = RACEGR2)) +
geom_point() +
coord_fixed() +
theme(legend.position = "right") +
annotate(
"label", x = Inf, y = Inf, hjust = 1, vjust = 1,
label = str_c("p: ", (fit_RACE$`Pr(>F)`[1]) %>% round(6))
) +
scale_color_manual(
"Self-declared race", values = c("purple1", "darkseagreen2", "gold3"),
guide = guide_legend(direction = "vertical")
) +
ggtitle("PCoA by participants' self-declared race")
g_site <-
ggplot(mds_df, aes(x = PCo1, y = PCo2, col = SITENAME)) +
geom_point() +
coord_fixed() +
theme(legend.position = "right") +
annotate(
"label", x = Inf, y = Inf, hjust = 1, vjust = 1,
label = str_c("p: ", (fit_SITE$`Pr(>F)`[1]) |> round(3))
) +
scale_color_brewer(
"Study site", type = "qual", palette = 7,
guide = guide_legend(direction = "vertical")
) +
ggtitle("PCoA by study site")g_arms +
g_race +
plot_spacer() +
g_site +
plot_layout(ncol = 2, widths = c(1, 1.2))df_load <- clin |> select(USUBJID, AVISITN, LOAD, RACEGR2) |> filter(AVISITN == 1)
g_load <-
df_load |> filter(!is.na(LOAD)) |>
ggplot() +
aes(x = RACEGR2, y = log10(LOAD + 1), fill = RACEGR2) +
geom_boxplot(alpha = 0.5, outlier.shape = NULL) +
geom_point(alpha = 0.5) +
labs(
x = "Self-declared race",
y = "log10(post-MTZ total bacterial load + 1)"
) +
scale_fill_manual(
"Self-declared race", values = c("purple1", "darkseagreen2", "gold3")
) +
guides(fill = "none")
df_shannon_v0 <-
mb_wide_v0 |>
mutate(
shannon_v0 = assay |> vegan::diversity(index = "shannon")
) |>
select(USUBJID, shannon_v0)
df_load <- df_load |> left_join(df_shannon_v0, by = join_by(USUBJID))
g_shannon <-
df_load |> filter(!is.na(shannon_v0)) |>
ggplot() +
aes(x = RACEGR2, y = shannon_v0, fill = RACEGR2) +
geom_boxplot(alpha = 0.5, outlier.shape = NULL) +
geom_point(alpha = 0.5) +
labs(
x = "Self-declared race",
y = "pre-MTZ α-diversity\n(Shannon index on taxa proportions)"
) +
scale_fill_manual(
"Self-declared race", values = c("purple1", "darkseagreen2", "gold3")
) +
guides(fill = "none")
g_lm <-
df_load |>
filter(!is.na(LOAD), !is.na(shannon_v0)) |>
ggplot(aes(x = shannon_v0, y = log10(LOAD + 1))) +
geom_point() +
geom_smooth(method = "lm", formula = 'y ~ x') +
labs(
x = "pre-MTZ α-diversity\n(Shannon index on taxa proportions)",
y = "log10(post-MTZ total bacterial load + 1)"
)g_load + g_shannon + g_lm race_and_edu <-
mae@colData |>
as_tibble() |>
filter(ARM %in% c("LACTIN-V", "Placebo"), !is.na(EDULVL)) |>
select(USUBJID, RACEGR1, RACEGR2, EDULVL) |>
distinct() |>
mutate(RACEGR2 = RACEGR2 |> fct_relevel("Other", after = 1)) |>
arrange(RACEGR2) |>
mutate(RACEGR2 = RACEGR2 |> as.character() |> str_wrap(15) |> fct_inorder())
(Xsq <- chisq.test(table(race_and_edu$EDULVL, race_and_edu$RACEGR2), simulate.p.value = TRUE))
Pearson's Chi-squared test with simulated p-value (based on 2000
replicates)
data: table(race_and_edu$EDULVL, race_and_edu$RACEGR2)
X-squared = 60.877, df = NA, p-value = 0.0004998
t_1 <- race_and_edu |> dplyr::count(RACEGR2, EDULVL)
g_1 <-
ggplot(t_1, aes(x = RACEGR2, y = EDULVL, fill = RACEGR2)) +
geom_tile(aes(alpha = n)) +
geom_text(aes(label = n)) +
# scale_alpha(range = c(0,1)) +
# scale_fill_gradient(low = "white", high = "steelblue") +
xlab("Self-declared participants' race") + ylab("Education") +
scale_fill_manual(values = c("purple1", "darkseagreen3", "gold3")) +
guides(fill = "none", alpha = "none") +
theme(
axis.title.y = element_text(angle = 0, hjust = 1, vjust = 1, margin = margin(r = -80))
)df_shannon_v0 <-
get_assay_wide_format(mae, "tax_16S_p", add_colData = FALSE) |>
mutate(
shannon_v0 = assay |> vegan::diversity(index = "shannon")
) |>
select(Barcode, shannon_v0) |>
left_join(
mae@colData |>
as_tibble() |>
select(Barcode, USUBJID, ARM, AVISITN, RACEGR2) |>
distinct(),
by = join_by(Barcode)
) |>
filter(AVISITN == 0, !is.na(ARM))
lm(shannon_v0 ~ RACEGR2, data = df_shannon_v0 |> mutate(RACEGR2 = RACEGR2 |> fct_relevel("White"))) |> summary()
Call:
lm(formula = shannon_v0 ~ RACEGR2, data = mutate(df_shannon_v0,
RACEGR2 = fct_relevel(RACEGR2, "White")))
Residuals:
Min 1Q Median 3Q Max
-1.9720 -0.1511 0.1097 0.3559 0.9980
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.95844 0.06149 31.851 <2e-16 ***
RACEGR2Black/African American 0.18528 0.08780 2.110 0.036 *
RACEGR2Other 0.14733 0.09855 1.495 0.136
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.55 on 205 degrees of freedom
Multiple R-squared: 0.02315, Adjusted R-squared: 0.01362
F-statistic: 2.43 on 2 and 205 DF, p-value: 0.09061
g_shannon <-
df_shannon_v0 |>
ggplot() +
aes(x = RACEGR2, y = shannon_v0, fill = RACEGR2) +
# geom_boxplot(alpha = 0.5, outlier.shape = NULL) +
geom_violin(alpha = 0.5, draw_quantiles = 0.5, color = "white", linewidth = 1) +
# geom_point(alpha = 0.5) +
ggbeeswarm::geom_quasirandom(width = 0.3, size = 0.5, aes(col = RACEGR2)) +
labs(
x = "Self-declared race",
y = "pre-MTZ α-diversity\n(Shannon index on taxa proportions)"
) +
scale_fill_manual(
"Self-declared race", values = c("purple1", "darkseagreen3", "gold3")
) +
scale_color_manual(
"Self-declared race", values = c("purple1", "darkseagreen3", "gold3")
) +
guides(fill = "none", col = "none")i <- 2
g <-
g_1 + labs(tag = LETTERS[i]) +
g_shannon + labs(tag = LETTERS[i + 1])
gggsave(g, filename = str_c(fig_out_dir(), "S7BC.pdf"), width = 13, height = 4, device = cairo_pdf)sessionInfo()R version 4.4.2 (2024-10-31)
Platform: aarch64-apple-darwin20
Running under: macOS Sequoia 15.6
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: Europe/Brussels
tzcode source: internal
attached base packages:
[1] grid stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] gtsummary_2.2.0 phyloseq_1.50.0 ggthemes_5.1.0 ggrepel_0.9.6
[5] gt_1.0.0 patchwork_1.3.0 magrittr_2.0.3 lubridate_1.9.4
[9] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4 purrr_1.0.4
[13] readr_2.1.5 tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.2
[17] tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] sandwich_3.1-1 DirichletReg_0.7-2
[3] permute_0.9-7 rlang_1.1.6
[5] ade4_1.7-23 matrixStats_1.5.0
[7] compiler_4.4.2 mgcv_1.9-1
[9] vctrs_0.6.5 reshape2_1.4.4
[11] pkgconfig_2.0.3 crayon_1.5.3
[13] fastmap_1.2.0 backports_1.5.0
[15] XVector_0.46.0 labeling_0.4.3
[17] rmarkdown_2.29 markdown_2.0
[19] tzdb_0.5.0 ggbeeswarm_0.7.2
[21] UCSC.utils_1.2.0 miscTools_0.6-28
[23] xfun_0.52 MultiAssayExperiment_1.32.0
[25] zlibbioc_1.52.0 litedown_0.7
[27] GenomeInfoDb_1.42.3 jsonlite_2.0.0
[29] biomformat_1.34.0 rhdf5filters_1.18.1
[31] DelayedArray_0.32.0 Rhdf5lib_1.28.0
[33] broom_1.0.8 parallel_4.4.2
[35] cluster_2.1.8.1 R6_2.6.1
[37] stringi_1.8.7 RColorBrewer_1.1-3
[39] GenomicRanges_1.58.0 Rcpp_1.0.14
[41] SummarizedExperiment_1.36.0 iterators_1.0.14
[43] knitr_1.50 zoo_1.8-14
[45] base64enc_0.1-3 IRanges_2.40.1
[47] BiocBaseUtils_1.8.0 Matrix_1.7-3
[49] splines_4.4.2 igraph_2.1.4
[51] timechange_0.3.0 tidyselect_1.2.1
[53] rstudioapi_0.17.1 abind_1.4-8
[55] yaml_2.3.10 vegan_2.6-10
[57] maxLik_1.5-2.1 codetools_0.2-20
[59] lattice_0.22-6 plyr_1.8.9
[61] Biobase_2.66.0 withr_3.0.2
[63] evaluate_1.0.3 survival_3.8-3
[65] xml2_1.3.8 Biostrings_2.74.1
[67] pillar_1.10.2 MatrixGenerics_1.18.1
[69] foreach_1.5.2 stats4_4.4.2
[71] generics_0.1.4 S4Vectors_0.44.0
[73] hms_1.1.3 commonmark_1.9.5
[75] scales_1.4.0 glue_1.8.0
[77] tools_4.4.2 data.table_1.17.4
[79] fs_1.6.6 rhdf5_2.50.2
[81] ape_5.8-1 cards_0.6.0
[83] colorspace_2.1-1 nlme_3.1-168
[85] GenomeInfoDbData_1.2.13 beeswarm_0.4.0
[87] vipor_0.4.7 cardx_0.2.4
[89] Formula_1.2-5 cli_3.6.5
[91] S4Arrays_1.6.0 gtable_0.3.6
[93] sass_0.4.10 digest_0.6.37
[95] BiocGenerics_0.52.0 SparseArray_1.6.2
[97] htmlwidgets_1.6.4 farver_2.1.2
[99] htmltools_0.5.8.1 multtest_2.62.0
[101] lifecycle_1.0.4 httr_1.4.7
[103] MASS_7.3-65